# -*- coding: utf-8 -*-
import scrapy,random,json,jsonpath,math
from xhs_spider.phone_agent import agent
from xhs_spider.items import XhsSpiderItem,NoteItem
import re
from xhs_spider.redisfilter import BloomFilter
from xhs_spider.randomseed import Randomuser
from xhs_spider.randomfilter import Filters,Comits


class XhsSpider(scrapy.Spider):
    name = 'xhs'
    # 粉丝连接
    fans_url = 'https://www.xiaohongshu.com/api/sns/v1/user/%s/followers?platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1539241892&sign=0ed193beaeec85a580052c1e99b884ed'
    fllow = 'https://www.xiaohongshu.com/api/sns/v1/user/%s/followings?platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1539241811&sign=e9ba1322f1484aa3cedc91e762a63c4d'

    info_url = 'https://www.xiaohongshu.com/api/sns/v3/user/%s/info?platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1539241928&sign=943b564ab81351ad5406e6a1045f5468'
    def start_requests(self):
        # 种子用户
        ud=Randomuser()
        uid =ud.randomuserid()
        # 种子选手粉丝
        base_url = self.fans_url % uid
        # 种子选手关注
        fllow= self.fllow % uid
        yield scrapy.Request(base_url,callback=self.fans_list)
        yield scrapy.Request(fllow,callback=self.fans_list)
    # 粉丝列表
    def fans_list(self,response):
        html=response.text
        print(html)
        fans_list=json.loads(html).get('data')
        for fans in fans_list:
            # 构建新的请求使用
            userid = fans.get('userid')
            info_urls = self.info_url % userid
            bf = BloomFilter()
            if bf.isContains(info_urls):  # 判断字符串是否存在
                print('exists')
            else:
                bf.insert(info_urls)
                yield scrapy.Request(info_urls,callback=self.fans_info)
    # 粉丝详情
    def fans_info(self,response):
        item=XhsSpiderItem()
        fans_html=response.text
        user = json.loads(fans_html).get('data')
        if user != '':
            def remove_emoji(desstr, restr=''):
                '''
                过滤表情
                '''
                try:
                    co = re.compile(u'[\U00010000-\U0010ffff]')
                except re.error:
                    co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
                return co.sub(restr, desstr)
            # 红书昵称
            item["nickname"] = remove_emoji(user.get('nickname',''))
            # 红书ID
            item["red_id"] = user.get('red_id')
            # 粉丝
            item["fans"] = user.get('fans','0')
            # 关注
            item["follows"] = user.get('follows','0')
            # 描述
            item["describ"] = remove_emoji(user.get('desc',''))
            # 性别
            gender =user.get('gender','')
            item['gender']= '女' if gender==1 else '男'
            # 收藏和获赞
            collected = user.get('collected',0)
            liked = user.get('liked',0)
            item['collect_like'] = int(collected) + int(liked)
            # 笔记数
            item['ndiscovery'] = user.get('ndiscovery',0)
            # 个人log
            item["user_logo"] = user.get('mini_program_info').get('thumb','')

            userid=user.get('userid')
            item['userid'] = user.get('userid')
            # 个人的空间地址
            item['user_url'] = 'https://www.xiaohongshu.com/user/profile/{}'.format(userid)
            # # # 如果粉丝大于2000,才抓取,过滤掉没有价值的信息
            if item['fans'] >= 2000:
                print('-----该账号粉丝数量大于2000,正在抓取------')
                yield item
            # 无限调用
            fans_urls = self.fans_url % userid
            follow_urls = self.fllow % userid
            # yield item
            # # 粉丝列表
            yield scrapy.Request(fans_urls, callback=self.fans_list)
            yield scrapy.Request(follow_urls, callback=self.fans_list)


        # 获得笔记的页数
        #     if item['ndiscovery'] != 0:
        #         page_nums = int(math.ceil(item['ndiscovery']/10))
        #         # 请求笔记的连接
        #         for page in range(1,page_nums+1):
        #             if page == 20:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=20&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916930&sign=23da1ae31aa2e5b88edb25e3f7d7091b'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 19:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=19&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916905&sign=eafbcb80d2c7ea2ac1f90c4ca6e0f7d2'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 18:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=18&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916857&sign=92fcadcad26cb225fb5950bcb2f0b186'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 17:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=17&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916793&sign=7e4912bc21efccba2812a601cf66f4fd'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 16:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=16&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916738&sign=6c4443a205cde0de6694816a2fda3ba1'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 15:
        #                 node_url ='https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=15&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916655&sign=73b0b5e424b7457f2f62e742c5ce5bb6'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 14:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=14&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916590&sign=9463e885bd5ddf6500bb4afcd0fca8df'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 13:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=13&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916550&sign=ec643f258603b4b01a8624bfbfa76376'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 12:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=12&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916444&sign=0eedad6083685638a7bcefa98f7c9e14'.format(userid=userid)
        #                 yield scrapy.Request(node_url,callback=self.video_info,meta={'item':item})
        #             elif page == 11:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=11&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536916266&sign=a63c2ad40e54ea202167f7092e465b69'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 10:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=10&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888737&sign=fe06b9602c0743e180a2de87d2a06bcb'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 9:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=9&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888693&sign=d97c97312b4df86a662ee510c142d4ac'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 8:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=8&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888651&sign=b2766dd89d0dd78db297d14ca407923a'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 7:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=7&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888596&sign=ecd32a658048e780642009e6410da3df'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 6:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=6&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888510&sign=3874b37580711c7dabb2420e6c9c5104'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 5:
        #                 node_url ='https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=5&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888465&sign=b0585b1bb2bb47a0a887881964535de4'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 4:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=4&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888410&sign=bb80a912df5e6ed1f1554eb2b9f7c10c'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 3:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=3&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888281&sign=e587149b39167ac5a517a45b6f5eea64'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})
        #             elif page == 2:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=2&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888224&sign=aeb9fbe7f62c4de843b62534420cc8e0'.format(userid=userid)
        #                 yield scrapy.Request(node_url,callback=self.video_info,meta={'item':item})
        #             elif page == 1:
        #                 node_url = 'https://www.xiaohongshu.com/api/sns/v2/note/user/{userid}?page=1&page_size=10&sub_tag_id=&platform=Android&deviceId=cd791be3-2df0-3f0c-a137-9b0a4c8a796a&device_fingerprint=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&device_fingerprint1=201809121411034abaed68357e498f045ae5b48365dfea019f44c63e626860&versionName=5.24.1&channel=xiaohongshu&sid=session.1221344937852575358&lang=zh-Hans&t=1536888167&sign=3b8aac07e90ff900f90fb579ef6a636b'.format(userid=userid)
        #                 yield scrapy.Request(node_url, callback=self.video_info,meta={'item':item})

    # 视频详情
    # def video_info(self,response):
    #     item = response.meta['item']
    #     notes_list = json.loads(response.text)
    #     note_data_list = notes_list.get('data').get('notes')
    #     for note in note_data_list:
    #         noteitem = NoteItem()
    #         noteitem['note_userid'] = item['userid']
    #         noteitem["note_id"] = note.get('id')
    #         def remove_emoji(desstr, restr=''):
    #             '''
    #             过滤表情
    #             '''
    #             try:
    #                 co = re.compile(u'[\U00010000-\U0010ffff]')
    #             except re.error:
    #                 co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
    #             return co.sub(restr, desstr)
    #         noteitem["note_title"] = remove_emoji(note.get('title'))
    #         type = note.get('type')
    #         if type == 'normal':
    #             # 点赞链接
    #             comment_url = random.choice(Filters).format(note_id=noteitem["note_id"])
    #
    #
    #             yield scrapy.Request(comment_url, callback=self.comment_info, meta={'noteitem': noteitem,'item': item})
    #
    # # 点赞详情
    # def comment_info(self,response):
    #
    #     noteitem = response.meta['noteitem']
    #     item = response.meta['item']
    #     comment_list = json.loads(response.text)
    #
    #     comment_data = comment_list.get('data')
    #     if comment_data is None:
    #         return
    #     # 收藏数
    #     noteitem["fav_count"] = comment_data.get('fav_count',0)
    #     # 点赞数
    #     noteitem["likes"] = comment_data.get('likes',0)
    #
    #     # 评论数链接
    #     comment_nums_url = random.choice(Comits).format(note_id=noteitem["note_id"])
    #     yield scrapy.Request(comment_nums_url, callback=self.comment_num, meta={'noteitem': noteitem,'item': item})
    # # 评论详情
    # def comment_num(self,response):
    #     noteitem = response.meta['noteitem']
    #     item = response.meta['item']
    #     userid = item['userid']
    #     nums_list = json.loads(response.text)
    #     nums_data = nums_list.get('data')
    #     # 评论数量
    #     if nums_data is None:
    #         return
    #     noteitem["comment_count"] = nums_data.get('comment_count',0)
    #
    #     yield noteitem










